source("Scripts/00_setup.R")
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   0.8.3     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: maps
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:plotly':
## 
##     rename
## The following object is masked from 'package:data.table':
## 
##     melt
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:chron':
## 
##     days, hours, minutes, seconds, years
## The following object is masked from 'package:reshape':
## 
##     stamp
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday,
##     week, yday, year
## The following object is masked from 'package:base':
## 
##     date
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:lubridate':
## 
##     here
## The following objects are masked from 'package:reshape':
## 
##     rename, round_any
## The following objects are masked from 'package:plotly':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:maps':
## 
##     ozone
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
## 
## Attaching package: 'foreach'
## The following object is masked from 'package:chron':
## 
##     times
## The following objects are masked from 'package:purrr':
## 
##     accumulate, when
## Loading required package: iterators
## 
## Attaching package: 'cluster'
## The following object is masked from 'package:maps':
## 
##     votes.repub
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin

Loading data

load(file = paste0(IO$output_data, "users.Rdata"), verbose = TRUE)
## Loading objects:
##   users
load(file = paste0(IO$output_data, "cycles.Rdata"), verbose = TRUE)
## Loading objects:
##   cycles

Table 1

# average app usage duration
agg_start = aggregate(cycle_start ~ user_id, cycles, min)
colnames(agg_start) = c("user_id", "start_first_cycle")
agg_end = aggregate(cycle_start+cycle_length ~ user_id, cycles, max)
colnames(agg_end) = c("user_id", "end_last_cycle")
agg = merge(agg_start, agg_end)
m = match(users$user_id, agg$user_id)
agg = agg[m,]
agg$app_duration_in_days = as.numeric(agg$end_last_cycle - agg$start_first_cycle)
agg$app_duration_in_years = agg$app_duration_in_days / 365
ju = which(users$BC != "unclear")
jc = which(cycles$BC != "unclear")

# Number of users
df = data.frame(field = "Number of users",original_dataset = nrow(users), filtered_dataset  = sum(users$BC != "unclear"))

# Number of cycles
df = rbind(df,
           data.frame(field = "Number of cycles",original_dataset = nrow(cycles), filtered_dataset  = sum(cycles$BC != "unclear")))

# Number of breast tenderness symptoms
df = rbind(df,
           data.frame(field = "Number of breast tenderness symptoms",original_dataset = sum(cycles$n_TB), filtered_dataset  = sum(cycles$n_TB[jc])))



# average app usage duration
df = rbind(df,
           data.frame(field = "Average app usage duration (in year)",
                      original_dataset = round(mean(agg$app_duration_in_years),2), 
                      filtered_dataset  = round(mean(agg$app_duration_in_years[ju]),2)))
df = rbind(df,
           data.frame(field = "Average app usage duration (sd)",
                      original_dataset = round(sd(agg$app_duration_in_years),2), 
                      filtered_dataset  = round(sd(agg$app_duration_in_years[ju]),2)))


# average app number of cycles
df = rbind(df,
           data.frame(field = "Average # of cycles per users",
                      original_dataset = round(mean(users$n_cycles),2), 
                      filtered_dataset  = round(mean(users$n_cycles[ju]),2)))
df = rbind(df,
           data.frame(field = "Average # of cycles (sd)",
                      original_dataset = round(sd(users$n_cycles),2), 
                      filtered_dataset  = round(sd(users$n_cycles[ju]),2)))

# average  number of breast tenderness per users
df = rbind(df,
           data.frame(field = "Average # of breast tenderness per users",
                      original_dataset = round(mean(users$n_tender_breasts),2), 
                      filtered_dataset  = round(mean(users$n_tender_breasts[ju]),2)))

df = rbind(df,
           data.frame(field = "Median # of breast tenderness",
                      original_dataset = round(median(users$n_tender_breasts),2), 
                      filtered_dataset  = round(median(users$n_tender_breasts[ju]),2)))

df = rbind(df,
           data.frame(field = "Average # of breast tenderness (sd)",
                      original_dataset = round(sd(users$n_tender_breasts),2), 
                      filtered_dataset  = round(sd(users$n_tender_breasts[ju]),2)))

# average number of breast tenderness per cycles
df = rbind(df,
           data.frame(field = "Average # of breast tenderness per cycle",
                      original_dataset = round(mean(cycles$n_TB),2), 
                      filtered_dataset  = round(mean(cycles$n_TB[jc]),2)))

df = rbind(df,
           data.frame(field = "Average # of breast tenderness per cycle (sd) ",
                      original_dataset = round(sd(cycles$n_TB),2), 
                      filtered_dataset  = round(sd(cycles$n_TB[jc]),2)))


# percentage of cycles per BC (original)
table_BC_CLUE_o = table(cycles$birth_control_CLUE)
table_BC_CLUE_o_perc = round(100*table_BC_CLUE_o/nrow(cycles),2)

table_BC_CLUE_f = table(cycles$birth_control_CLUE[jc])
table_BC_CLUE_f_perc = round(100*table_BC_CLUE_f/length(jc),2)

df = rbind(df,
           data.frame(field = paste0("% of cycles per BC as logged by the users (",names(table_BC_CLUE_o_perc),")"),
                      original_dataset = as.numeric(table_BC_CLUE_o_perc), 
                      filtered_dataset  = as.numeric(table_BC_CLUE_f_perc)))

# percentage of cycles per BC (re-assigned)

table_BC_o = table(cycles$BC)
table_BC_o_perc = round(100*table_BC_o/nrow(cycles),2)

table_BC_f = table(cycles$BC[jc])
table_BC_f_perc = round(100*table_BC_f/length(jc),2)

df = rbind(df,
           data.frame(field = paste0("% of cycles per BC as re-assigned (",names(table_BC_o_perc),")"),
                      original_dataset = as.numeric(table_BC_o_perc), 
                      filtered_dataset  = c(as.numeric(table_BC_f_perc),0)))



df
##                                                         field
## 1                                             Number of users
## 2                                            Number of cycles
## 3                        Number of breast tenderness symptoms
## 4                        Average app usage duration (in year)
## 5                             Average app usage duration (sd)
## 6                               Average # of cycles per users
## 7                                    Average # of cycles (sd)
## 8                    Average # of breast tenderness per users
## 9                               Median # of breast tenderness
## 10                        Average # of breast tenderness (sd)
## 11                   Average # of breast tenderness per cycle
## 12             Average # of breast tenderness per cycle (sd) 
## 13 % of cycles per BC as logged by the users (none / condoms)
## 14           % of cycles per BC as logged by the users (pill)
## 15  % of cycles per BC as logged by the users (did not enter)
## 16         % of cycles per BC as re-assigned (none / condoms)
## 17                   % of cycles per BC as re-assigned (pill)
## 18                % of cycles per BC as re-assigned (unclear)
##    original_dataset filtered_dataset
## 1         216484.00        210529.00
## 2        3686977.00       3056348.00
## 3        4530662.00       4132033.00
## 4              1.54             1.55
## 5              0.65             0.65
## 6             17.03            17.15
## 7              7.45             7.48
## 8             20.93            21.27
## 9              8.00             8.00
## 10            36.06            36.43
## 11             1.23             1.35
## 12             2.94             3.12
## 13            28.42            31.50
## 14            39.08            39.29
## 15            32.50            29.21
## 16            48.94            59.04
## 17            33.95            40.96
## 18            17.10             0.00
write.csv(df, file = paste0(IO$tables,"Table_1.csv") , row.names = FALSE)

Supplementary Table 1

table_n = table(cycles$birth_control_CLUE, cycles$BC)
table_perc = round( 100*table_n/apply(table_n, 1, sum) , 2)

rownames(table_n) = paste0(rownames(table_n), ": n cycles")
table_n
##                           
##                            none / condoms   pill unclear
##   none / condoms: n cycles         895215  67507   85295
##   pill: n cycles                   215958 984776  240084
##   did not enter: n cycles          693365 199527  305250
rownames(table_perc) = paste0(rownames(table_perc), ": % cycles")
table_perc
##                           
##                            none / condoms  pill unclear
##   none / condoms: % cycles          85.42  6.44    8.14
##   pill: % cycles                    14.99 68.35   16.66
##   did not enter: % cycles           57.87 16.65   25.48
table_BC = rbind(table_n,table_perc)
o = order(rownames(table_BC), decreasing = TRUE)
table_BC = table_BC[o,]
table_BC
##                          none / condoms      pill   unclear
## pill: n cycles                215958.00 984776.00 240084.00
## pill: % cycles                    14.99     68.35     16.66
## none / condoms: n cycles      895215.00  67507.00  85295.00
## none / condoms: % cycles          85.42      6.44      8.14
## did not enter: n cycles       693365.00 199527.00 305250.00
## did not enter: % cycles           57.87     16.65     25.48
write.csv(table_BC, file = paste0(IO$tables, "Supplementary_Table_BC_table.csv"))

Table 2: users’ features

mean(users$age)
## [1] 20.59862
sd(users$age)
## [1] 4.946749
mean(users$age[ju])
## [1] 20.61776
sd(users$age[ju])
## [1] 4.953888
round(100*table(users$age_cat)/nrow(users),2)
## 
## (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45] 
##    9.14   51.75   23.20   10.42    4.34    1.05    0.11
round(100*table(users$age_cat[ju])/length(ju),2)
## 
## (10,15] (15,20] (20,25] (25,30] (30,35] (35,40] (40,45] 
##    9.10   51.61   23.29   10.47    4.36    1.06    0.11
lu(users$country)
## [1] 24
lu(users$country[ju])
## [1] 24
round(100*sort(table(users$country))/nrow(users),2)
## 
##      Venezuela        Ecuador        Ireland        Belgium         Sweden 
##           0.04           0.05           0.05           0.11           0.16 
##           Peru    Switzerland        Austria       Portugal         Russia 
##           0.17           0.19           0.24           0.52           0.74 
##        Denmark          Chile      Australia      Argentina       Colombia 
##           0.75           0.93           1.56           1.58           1.64 
##          Italy          Spain         Canada        Germany         France 
##           2.32           2.84           3.14           7.29           8.01 
##         Mexico United Kingdom         Brazil  United States 
##           8.37           9.12          20.49          29.71
round(100*sort(table(users$country[ju]))/length(ju),2)
## 
##      Venezuela        Ireland        Ecuador        Belgium         Sweden 
##           0.04           0.05           0.05           0.11           0.16 
##           Peru    Switzerland        Austria       Portugal         Russia 
##           0.17           0.19           0.24           0.53           0.74 
##        Denmark          Chile      Australia      Argentina       Colombia 
##           0.75           0.93           1.53           1.58           1.62 
##          Italy          Spain         Canada        Germany         France 
##           2.34           2.85           3.12           7.30           8.01 
##         Mexico United Kingdom         Brazil  United States 
##           8.42           9.05          20.48          29.74
mean(users$height, na.rm = TRUE)
## [1] 163.4378
sd(users$height, na.rm = TRUE)
## [1] 6.965825
mean(users$height[ju], na.rm = TRUE)
## [1] 163.4301
sd(users$height[ju], na.rm = TRUE)
## [1] 6.964167
mean(users$weight, na.rm = TRUE)
## [1] 61.05458
sd(users$weight, na.rm = TRUE)
## [1] 15.13491
mean(users$weight[ju], na.rm = TRUE)
## [1] 61.02502
sd(users$weight[ju], na.rm = TRUE)
## [1] 15.08613
mean(users$bmi, na.rm = TRUE)
## [1] 22.85278
sd(users$bmi, na.rm = TRUE)
## [1] 5.304415
mean(users$bmi[ju], na.rm = TRUE)
## [1] 22.84302
sd(users$bmi[ju], na.rm = TRUE)
## [1] 5.274114
round(100*sort(table(users$birth_control_o))/nrow(users),2)
## 
##       condoms          none          pill did not enter 
##         10.12         18.25         35.67         35.96
round(100*sort(table(users$birth_control_o[ju]))/length(ju),2)
## 
##       condoms          none did not enter          pill 
##         10.31         18.59         35.31         35.79
round(100*sort(table(users$BC))/nrow(users),2)
## 
##        unclear           both           pill none / condoms 
##           2.75          21.07          26.03          50.15
round(100*sort(table(users$BC[ju]))/length(ju),2)
## 
##           both           pill none / condoms 
##          21.67          26.76          51.57

Demographics (Supplementary viz)

g = ggplot(users, aes(x = age, fill = birth_control))
g + geom_histogram(binwidth = 1) + facet_grid(birth_control ~.)

g = ggplot(users, aes(x = age, fill = country))
g + geom_histogram(binwidth = 1) + facet_wrap(country ~.)

load(file = paste0(IO$out_Rdata, "number_of_users_and_cycles_per_category.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
g = ggplot(agg, aes(y = BC, x= 0,   col = BC)) + 
  facet_grid(bmi_cat ~ age_cat) + 
  geom_text(aes(label = n_users)) + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.ticks = element_blank(),
        axis.text.x = element_blank()) + 
  guides(color = FALSE, size = FALSE) + xlab("") + ylab("") + 
  ggtitle("Number of users per category of age (horizontal) and bmi (vertical)")

g
## Warning: Removed 16 rows containing missing values (geom_text).

ggsave(filename = paste0(IO$panels, "S1_X_number_of_users_per_cat.pdf"), plot = g)
## Saving 7 x 5 in image
## Warning: Removed 16 rows containing missing values (geom_text).
g = ggplot(agg, aes(y = BC, x= 0,   col = BC)) + 
  facet_grid(bmi_cat ~ age_cat) + 
  geom_text(aes(label = n_cycles)) + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), 
        axis.ticks = element_blank(),
        axis.text.x = element_blank()) + 
  guides(color = FALSE, size = FALSE) + xlab("") + ylab("") +
  ggtitle("Number of cycles per category of age (horizontal) and bmi (vertical)")
g
## Warning: Removed 51 rows containing missing values (geom_text).

ggsave(filename = paste0(IO$panels, "S1_X_number_of_cycles_per_cat.pdf"), plot = g)
## Saving 7 x 5 in image
## Warning: Removed 51 rows containing missing values (geom_text).

Figure 1: Number of Breast Tenderness per users

ju = which(users$BC %in% as.character(par$BC_dict$name))
g = ggplot(users[ju,], aes(x = n_tender_breasts, fill = BC))+
  geom_histogram(binwidth = 1, position = "identity", alpha = 0.5)+xlim(c(-1,20))
g
## Warning: Removed 49439 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).

j = which((users$BC %in% as.character(par$BC_dict$name)) & 
            (users$perc_cycles_with_TB>=0) & 
            (users$perc_cycles_with_TB<=1))

g = ggplot(users[j,], aes(x = perc_cycles_with_TB, fill = BC))+
  geom_histogram(binwidth = 0.1, position = "identity", alpha = 0.5)
g

table_BC_BT = table(users$BC[j], round(users$perc_cycles_with_TB[j],1))
table_BC_BT
##                 
##                      0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8
##   none / condoms 15508 10789 11093  8348  8965  8314  8283  7290  9043
##   pill           23834 10241  6815  3493  2846  2012  1648  1133  1364
##                 
##                    0.9     1
##   none / condoms  7646  6801
##   pill             809   859
table_BC_BT_perc = round(100*table_BC_BT/apply(table_BC_BT,1,sum),2)
table_BC_BT_perc
##                 
##                      0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8
##   none / condoms 15.19 10.57 10.87  8.18  8.78  8.14  8.11  7.14  8.86
##   pill           43.29 18.60 12.38  6.34  5.17  3.65  2.99  2.06  2.48
##                 
##                    0.9     1
##   none / condoms  7.49  6.66
##   pill            1.47  1.56
table_BC_BT_perc_df = as.data.frame(table_BC_BT_perc)
colnames(table_BC_BT_perc_df) = c("BC","perc_cycles_with_TB","perc_users")


g = ggplot(table_BC_BT_perc_df, aes(x = perc_cycles_with_TB,y = perc_users, fill = BC))+
  geom_bar(stat = "identity",position = "dodge", alpha = 1)+
  xlab("Fraction of cycles with at least one reported breast tenderness")+
  ylab("% of users")+
  geom_text(data = data.frame(BC = names(table(users$BC[j])),n_users = as.numeric(table(users$BC[j]))),
            aes(col = BC, label = paste("tot # users:",n_users), 
                x = 8,y = 0.6*max(table_BC_BT_perc_df$perc_users)+6*as.numeric(BC)))
g

ggsave(file = paste0(IO$panels, "perc of users vs perc of cycles with at least one BT.pdf"),
       plot = g, 
       width = viz$full_width/2,
       height = 0.4*viz$full_width/2,
       scale = viz$scale*0.85)

Figure 1: Examples

#load(paste0(IO$input_data,"days/days_1.Rdata"), verbose = TRUE)

load(file = paste0(IO$out_Rdata,"sub_days_examples_input_data.Rdata"), verbose = TRUE)
## Loading objects:
##   sub_days
days = sub_days

user_ids = unique(days$user_id)

for(user_id in user_ids){
  j = which((days$user_id == user_id)&(!is.na(days$cycle_id_m)) & (days$category != "pill_hbc") & (days$cycle_nb_m %in% 4:8))
  d = days[j,]
  g = ggplot_user_example(d = d, title = FALSE, size_factor = 1.5)
  print(g)
  
  ggsave(filename = paste0(IO$panels, "example_",d$user_id[1],".pdf"), plot = g, width = viz$full_width/2.8, height = viz$full_width/4)
}
rm(days, user_id, user_ids, g, d, j)
## Warning in rm(days, user_id, user_ids, g, d, j): object 'd' not found

Figure 1: Tracking groups

load(file = paste0(IO$out_Rdata,"tracking_behavior_overal_pattern_by_tracking_group_and_birth_control.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
unique(agg[,c("BC","tracking_group","tot_n_cycles")])
##                 BC         tracking_group tot_n_cycles
## 1   none / condoms     menstrual tracking       368956
## 27            pill     menstrual tracking       220308
## 53  none / condoms pre-menstrual tracking       254472
## 79            pill pre-menstrual tracking        37063
## 105 none / condoms       regular tracking       459701
## 131           pill       regular tracking       700152
## 157 none / condoms      sporadic tracking       684557
## 183           pill      sporadic tracking       263187
agg$SE = agg$fraction_cycles_with_logs_SE

g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_logs, col = BC, shape = BC)) + 
  geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
  geom_ribbon(aes(ymin = fraction_cycles_with_logs-1.96*SE, ymax = fraction_cycles_with_logs+1.96*SE, fill = BC),
              alpha = 0.5, col = NA)+
  geom_line() + geom_point()+ 
  scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
  scale_colour_manual(values= cols$BC , name="BC")+
  scale_fill_manual(values= cols$BC , name="BC")+
  guides(col = FALSE, fill = FALSE, shape = FALSE)+
  xlab("cycleday from 1st day of menstruation") + ylab("% cycles with any log")+
  theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))+
  facet_grid( . ~ tracking_group)
g

ggsave(file = paste0(IO$panels, "tracking_profile_perc_cycles_with_logs_per_tracking_group_and_BC.pdf"),
       plot = g, 
       width = viz$full_width/2,
       height = 0.3*viz$full_width/2,
       scale = viz$scale*1.2)


g_n = ggplot(agg, aes(x = cycleday_m_D, y = avg_n_logs, col = BC, shape = BC)) + 
  geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
  geom_line() + geom_point()+ 
  scale_x_continuous(breaks = par$x.axis)+
  scale_colour_manual(values= cols$BC , name="BC")+
  scale_fill_manual(values= cols$BC , name="BC")+
  guides()+
  xlab("cycleday from 1st day of menstruation") + ylab("average # of logs")+
  theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))+
  facet_grid( . ~ tracking_group)+
  geom_text(aes(x = -par$D, y = 9.25+ 1*(match(BC, par$BC_dict$name)-1), label = paste0("n: ",tot_n_cycles)),
            hjust = 0, vjust = 0,show.legend = FALSE)
g_n

ggsave(file = paste0(IO$panels, "tracking_profile_avg_log_per_tracking_group_and_BC.pdf"),
       plot = g_n, 
       width = viz$full_width/2,
       height = 0.4*viz$full_width/2,
       scale = viz$scale*1.2)

Figure 1: Overall tender breasts profiles per tracking groups

load(file = paste0(IO$out_Rdata,"TB_overal_pattern_by_tracking_group_and_birth_control.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
unique(agg[,c("BC","tracking_group","tot_n_cycles")])
##                 BC         tracking_group tot_n_cycles
## 1   none / condoms     menstrual tracking       368956
## 27            pill     menstrual tracking       220308
## 53  none / condoms pre-menstrual tracking       254472
## 79            pill pre-menstrual tracking        37063
## 105 none / condoms       regular tracking       459701
## 131           pill       regular tracking       700152
## 157 none / condoms      sporadic tracking       684557
## 183           pill      sporadic tracking       263187
agg$SE = agg$fraction_cycles_with_TB_SE

g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = BC, shape = BC)) + 
  geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
  geom_ribbon(aes(ymin = fraction_cycles_with_TB-1.96*SE, ymax = fraction_cycles_with_TB+1.96*SE, fill = BC), alpha = 0.5, col = NA)+
  geom_line() + geom_point()+ 
  scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
  scale_colour_manual(values= cols$BC , name="BC")+
  scale_fill_manual(values= cols$BC , name="BC")+
  guides(col = FALSE, fill = FALSE, shape = FALSE)+
  xlab("cycleday from 1st day of menstruation") + ylab("% cycles with reported BT")+
  theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))+
  facet_grid( . ~ tracking_group)+
  geom_text(aes(x = -par$D, y = 0.27+ 0.03*(match(BC, par$BC_dict$name)-1), label = paste0("n: ",tot_n_cycles)),
            hjust = 0, vjust = 1)
g

ggsave(file = paste0(IO$panels, "symptoms_profile_per_tracking_group_and_BC.pdf"),
       plot = g, 
       width = viz$full_width/2,
       height = 0.3*viz$full_width/2,
       scale = viz$scale*1.2)

Figure 1: Overall tender breasts profiles per age categories and BC for cycles with regular tracking

load(file = paste0(IO$out_Rdata,"TB_overal_pattern_by_age_cat_and_birth_control_for_regular_tracking.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
agg$age_x_BC = interaction(agg$age_cat , agg$BC)
agg$SE = agg$fraction_cycles_with_TB_SE


j = which(agg$age_cat %in% par$age_cat_exclude)
if(length(j)>0){agg = agg[-j,]}
j = which((agg$age_cat == "(35,40]") & (agg$BC == "pill"))
if(length(j)>0){agg = agg[-j,]}

agg = agg[which(agg$tracking_group == "regular tracking"),]

g = ggplot(agg, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = age_x_BC, shape = BC)) + 
  geom_vline(xintercept = 0, col = "gray90", size = 1.5)+
  geom_ribbon(aes(ymin = fraction_cycles_with_TB-1.96*SE, ymax = fraction_cycles_with_TB+1.96*SE, fill = age_x_BC), alpha = 0.5, col = NA)+
  geom_line() + geom_point()+ 
  scale_y_continuous(labels = scales::percent)+ scale_x_continuous(breaks = par$x.axis)+
  scale_colour_manual(values= cols$age_x_BC , name="Age group x BC")+
  scale_fill_manual(values= cols$age_x_BC , name="Age group x BC")+
  guides(col = FALSE, fill = FALSE, shape = FALSE)+
  xlab("cycleday from 1st day of menstruation") + ylab("% cycles with reported TB")+
  theme(legend.position="bottom",legend.background = element_rect(color="gray90", fill = NA))#+
#facet_grid(. ~ tracking_group)
g

ggsave(filename = paste0(IO$panels, "symptoms_profile_by_BC_and_age_cat_for_regular_tracking.pdf"), 
       plot = g, 
       width = viz$full_width/2, 
       height = 0.5*viz$full_width/2,
       scale = viz$scale)
load(file = paste0(IO$out_Rdata, "avg_symptoms_per_user.Rdata"), verbose = TRUE)
## Loading objects:
##   agg
g = ggplot(agg[agg$BC %in% c("pill","none / condoms"),], aes(x = n_TB_by_n_cycles, y = perc_users, fill = BC)) +
  geom_bar(stat = "identity", position = "dodge" )+
  scale_fill_manual(values = cols$BC)+
  xlab("# of reported TB symptoms / # of cycles")+ 
  ylab("% of users") + guides(fill = FALSE)
g

ggsave(filename = paste0(IO$panels, "1_avg_TB_per_user.pdf"), plot = g, width = viz$full_width/4.5, height = viz$full_width/4.5, scale = viz$scale)
load(file = paste0(IO$out_Rdata, "n_TB_per_cycle.Rdata"), verbose=TRUE)
## Loading objects:
##   agg
g = ggplot(agg[agg$BC != "unclear",], aes(x = tender_breasts_ul, y = perc_cycles, fill = BC)) +
  geom_bar(stat = "identity", position = "dodge") + 
  scale_fill_manual(values = cols$BC3) + guides(fill = FALSE)+
  xlab("# of reported Tender Breast symptoms per cycle")+ ylab("% of cycles")

g

ggsave(filename = paste0(IO$panels, "1_number_of_TB_per_cycle.pdf"), plot = g, width = viz$full_width/4, height = viz$full_width/4.5, scale = viz$scale)

Figure S2: Predictor of Breast Tenderness.

load(file = paste0(IO$out_Rdata, "symptoms_predictors.Rdata"), verbose=TRUE)
## Loading objects:
##   pred
g_z = ggplot(pred[!grepl("country",pred$input),], aes(x = cycleday_m_D, y = z_value, col = input, linetype = tracking_cluster)) + 
  geom_vline(xintercept = 0, size = 2, col = "gray90")+
  geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
  geom_hline(yintercept = 0, size = 0.1)+
  geom_line() + 
  scale_x_continuous(breaks = par$x.axis)+
  facet_wrap(input ~ .)+
  guides(col = FALSE)
g_z

g_z = g_z + guides(linetype = FALSE)


g_p = ggplot(pred[!grepl("country",pred$input),], aes(x = cycleday_m_D, y = -log10(q_value), col = input, linetype = tracking_cluster)) + 
  geom_vline(xintercept = 0, size = 2, col = "gray90")+
  geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
  geom_hline(yintercept = 0, size = 0.1)+
  geom_line() + 
  scale_x_continuous(breaks = par$x.axis)+
  facet_wrap(input ~ .)+
  guides(col = FALSE)
g_p

g_p = g_p + guides(linetype = FALSE)



g_z_countries = ggplot(pred[grep("country",pred$input),], aes(x = cycleday_m_D, y = z_value, col = input, linetype = tracking_cluster)) + 
  geom_vline(xintercept = 0, size = 2, col = "gray90")+
  geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
  geom_hline(yintercept = 0, size = 0.1)+
  geom_line() + 
  scale_x_continuous(breaks = par$x.axis)+
  facet_wrap(country_name ~ .)+
  guides(col = FALSE)
g_z_countries

g_z_countries = g_z_countries + guides(linetype = FALSE)


g_p_countries = ggplot(pred[grep("country",pred$input),], aes(x = cycleday_m_D, y = -log10(q_value), col = input, linetype = tracking_cluster)) + 
  geom_vline(xintercept = 0, size = 2, col = "gray90")+
  geom_vline(xintercept = -14, size = 4, col = "gray90", alpha = 0.5)+
  geom_hline(yintercept = 0, size = 0.1)+
  geom_line() + 
  scale_x_continuous(breaks = par$x.axis)+
  facet_wrap(country_name ~ .)+
  guides(col = FALSE)
g_p_countries

g_p_countries = g_p_countries + guides(linetype = FALSE)




ggsave(filename = paste0(IO$panels, "predictors_z.pdf"), 
       plot = g_z, 
       width = viz$full_width/2, 
       height = 0.75*viz$full_width/2, 
       scale = viz$scale)



ggsave(filename = paste0(IO$panels, "predictors_z_countries.pdf"), 
       plot = g_z_countries, 
       width = viz$full_width/2, 
       height = 0.75*viz$full_width/2, 
       scale = viz$scale)


ggsave(filename = paste0(IO$panels, "predictors_p.pdf"), 
       plot = g_p, 
       width = viz$full_width/2, 
       height = 0.75*viz$full_width/2, 
       scale = viz$scale)



ggsave(filename = paste0(IO$panels, "predictors_p_countries.pdf"), 
       plot = g_p_countries, 
       width = viz$full_width/2, 
       height = 0.75*viz$full_width/2, 
       scale = viz$scale)

Figure 2: Evolution in time

load(file = paste0(IO$out_Rdata,"evolution_of_symptom_tracking_in_time.Rdata"), verbose = TRUE)
## Loading objects:
##   fraction_TB
load(file = paste0(IO$out_Rdata,"evolution_of_symptom_tracking_in_time_min_n_cycles.Rdata"), verbose = TRUE)
## Loading objects:
##   min_n_cycles
fraction_TB$init_TB_group_text = factor(paste0("avg. #BT in cycles 1-5 = ",fraction_TB$init_TB_group))
min_n_cycles$init_TB_group_text = factor(paste0("avg. #BT in cycles 1-5 = ",min_n_cycles$init_TB_group))

fraction_TB$BC_x_init_TB_group =  interaction(fraction_TB$init_TB_group, fraction_TB$BC)
min_n_cycles$BC_x_init_TB_group =  interaction(min_n_cycles$init_TB_group, min_n_cycles$BC)


g = ggplot(fraction_TB, aes(x = cycle_nb_m_rel, y = median_n_TB, col = BC_x_init_TB_group, fill = BC_x_init_TB_group)) + 
  geom_line() + 
  geom_line(aes(y = avg_n_TB), linetype = 3) + 
  geom_ribbon(aes(ymin = q_05_n_TB , ymax = q_95_n_TB), alpha = 0.2, col = NA)+
  geom_ribbon(aes(ymin = q_25_n_TB , ymax = q_75_n_TB), alpha = 0.2, col = NA)+
  geom_text(data = min_n_cycles, aes(label = paste0("min # = ",min_n_cycles)), x = 0.5, y = 0.95*max(fraction_TB$q_95_n_TB), size = 3, vjust = 0, hjust = 0)+
  facet_grid(BC  ~ init_TB_group_text )+
  scale_y_continuous(breaks = seq(0,21, by = 3), minor_breaks = 0:21)+
  scale_color_manual(values = cols$BC_x_init_TB_group) + scale_fill_manual(values = cols$BC_x_init_TB_group)+ 
  guides(col = FALSE, fill = FALSE) +
  xlab("Cycle number") + ylab("# of reported BT")
g

ggsave(filename = paste0(IO$panels, "evolution_in_time.pdf"), 
       plot = g, 
       width = viz$full_width/2, 
       height = 0.5*viz$full_width/2,
       scale = viz$scale)

Figure 2: Consistency of Symptoms

load(file = paste0(IO$out_Rdata,"consistent_and_inconsistent_users.Rdata"), verbose = TRUE)
## Loading objects:
##   sel_d
sel_d_consistent = sel_d[sel_d$consistent == "consistent", ]
sel_d_average = sel_d[sel_d$consistent == "average", ]
sel_d_inconsistent = sel_d[sel_d$consistent == "inconsistent", ]


if((nrow(sel_d_inconsistent)>0)&(nrow(sel_d_consistent)>0)){
  
  g_consistent = ggplot_imputed_TB(sel_d = sel_d_consistent, 
                                   facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
  
  
  g_average = ggplot_imputed_TB(sel_d = sel_d_average, 
                                facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
  
  
  
  g_inconsistent = ggplot_imputed_TB(sel_d = sel_d_inconsistent, 
                                     facet_grid = c("BC","init_TB_group","user_id"), cycle_id = FALSE, col = "user_id")
  
  grid.arrange(g_inconsistent, g_average, g_consistent, nrow = 1)
}

load(file = paste0(IO$out_Rdata,"consistent_and_inconsistent_users.Rdata"), verbose = TRUE)
## Loading objects:
##   sel_d
user_ids = 
  c("586e33d674b46555254e908ec42d6771ed294bcd", "ff484ef33fd2c29086617ed63a9814571b86f15d", 
    "677332825213f1f0cb1e6831ead1fc787cf01663", "8714aad3100409673b8762775e474aff17321df4", 
    "40620830924a2f02935e3a6c6102f84e5de35fce","6ef947aeb9cfe0468bd1efd26b8a10dca0a1e645")

sel_d = sel_d[which(sel_d$user_id %in% user_ids),]

sel_d_consistent = sel_d[sel_d$consistent == "consistent", ]
sel_d_average = sel_d[sel_d$consistent == "average", ]
sel_d_inconsistent = sel_d[sel_d$consistent == "inconsistent", ]

if((nrow(sel_d_inconsistent)>0)&(nrow(sel_d_consistent)>0)&(nrow(sel_d_average)>0)){
  
  g_consistent = ggplot_imputed_TB(sel_d = sel_d_consistent, 
                                   facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
    scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
  
  g_average = ggplot_imputed_TB(sel_d = sel_d_average, 
                                facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
    scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
  
  
  g_inconsistent = ggplot_imputed_TB(sel_d = sel_d_inconsistent, 
                                     facet_grid = c("BC"), cycle_id = FALSE, col = "BC")+
    scale_color_manual(values = cols$BC)+ylab("")+theme(strip.text = element_blank())
  
  
  grid.arrange(g_inconsistent, g_average, g_consistent, nrow = 1)
  g_consistency = arrangeGrob(g_inconsistent, g_average, g_consistent, nrow = 1, widths = c(1,1,1))
  g_consistency
  
  ggsave(filename = paste0(IO$panels, "consistency_examples.pdf"), 
         plot = g_consistency, 
         width = viz$full_width/2, 
         height = 0.33*viz$full_width/2,
         scale = viz$scale*1.7)
  
  
}

load(file = paste0(IO$out_Rdata,"consistency_summary.Rdata"), verbose = TRUE)
## Loading objects:
##   df
df$BC = factor(df$BC, levels = c(as.character(par$BC_dict$name),"both"))
df$BC_x_init_TB_group =  interaction(df$init_TB_group, df$BC)


g = ggplot(df, aes(x = consistency, y = n_users, fill = BC_x_init_TB_group)) + 
  geom_area(position = "identity", alpha = 0.75) + 
  facet_grid(BC ~ . , scale = "free_y")+
  scale_fill_manual(values = cols$BC_x_init_TB_group2)+
  guides(fill = FALSE)
g

ggsave(filename = paste0(IO$panels, "consistency_summary.pdf"), 
       plot = g, 
       width = viz$full_width/2.5, 
       height = 0.6*viz$full_width/2.5,
       scale = viz$scale)

Figure 2: Phenotyping

for(bc in c("NC","pill")){
  i = (bc == "pill")+1
  BC = par$BC_dict$name[i]
  load(file = paste0(IO$tmp_data,"user_phenotyping_",bc,".Rdata"), verbose = TRUE)
  
  # screeplot
  
  g = ggplot(eig_df, aes(x = as.factor(x), y = eig)) + geom_bar(stat = "identity", fill = cols$BC[i])+
    xlab("dimensions")+ggtitle(BC)
  g
  
  ggsave(filename = paste0(IO$panels, "S_screeplot_",bc,".pdf"),
         plot = g,
         width = viz$full_width/4,
         height = 0.75*viz$full_width/4,
         scale = viz$scale)
  
  
  # interactive 3D
  
  plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
          color = ~n_days,
          type = "scatter3d", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  
  plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
          color = ~time,
          type = "scatter3d", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  
  plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
          color = ~max,
          type = "scatter3d", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  plot_ly(mds_df, x = ~X1, y = ~X2, z = ~X3,
          color = ~time_last_symptoms,
          type = "scatter3d", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  # interactive 2D
  
  
  plot_ly(mds_df, x = ~X1, y = ~X2,
          color = ~X3,
          type = "scatter", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  plot_ly(mds_df, x = ~X1, y = ~X3,
          color = ~X2,
          type = "scatter", mode = "markers",
          marker = list(size = 4),
          text = ~paste('ID: ', user_id))
  
  
  
  
  if(bc == "pill"){
    #mds_df$user_id[grep("65035d",mds_df$user_id)]
    user_ids = c("0ceafb77c214fb6e45d069e6c645083a9f7799d0","d0fcb8602f237946a56162f047c8ef3ef254541c",special_pill_user,
                 "73266275a3d6f3898cf58269070edb4d4edd2034","e610274623760e9c4f1a50e0ed5a00f17117ed0b",
                 "65035d05a6ccabaaeb82ecd4e17bde1c096adc2f",
                 "c45cd0e050cf6d704da32cbab899bacf51a519a3")
  }else{
    user_ids = c("2ac70e8def770369d086e9249fee9d5b3485ad0d","cb2700ea5a33499b1c97113444e1b2e5d0d09162",
                 "c4bf18fef2f8db48d6f4a881b1a23be849cbbc06","b70dde8d03e0f39bdb4cb67c88b0d26ad6002bd1",
                 "2eedf35808effb0a40381e11d1c94257096ab8de","cffcd394d8f769d363f67eada7f16b0106c3191a",
                 "b9590a55307d916d4aabc8e386bc76586a28ae0b")
    
    user_ids = c("2ac70e8def770369d086e9249fee9d5b3485ad0d","cb2700ea5a33499b1c97113444e1b2e5d0d09162",
                 "cffcd394d8f769d363f67eada7f16b0106c3191a","b70dde8d03e0f39bdb4cb67c88b0d26ad6002bd1",
                 "c4bf18fef2f8db48d6f4a881b1a23be849cbbc06","34c8890de2da03e29a7f93771ea9ecaf12cf4c88",
                 "2eedf35808effb0a40381e11d1c94257096ab8de")
  }
  
  j_mds = which(mds_df$user_id %in% user_ids)
  mds_df_subset = mds_df[j_mds,]
  mds_df_subset$user_id = factor(as.character(mds_df_subset$user_id), levels = user_ids)
  
  # MDS
  g_12 = ggplot(mds_df, aes(x = X1, y = X2)) + coord_fixed()+
    geom_point(alpha = 0.5, size = 0.3, col = "gray40")+
    geom_point(data = mds_df_subset, aes(x = X1, y = X2, col = user_id), size = 2, alpha = 0.7)+
    scale_x_continuous(breaks = seq(-1,1,by = 0.2))+
    scale_y_continuous(breaks = seq(-1,1,by = 0.2))+
    guides(col = FALSE)
  
  g_13 = ggplot(mds_df, aes(x = X1, y = X3)) + coord_fixed()+
    geom_point(alpha = 0.5, size = 0.3, col = "gray40") +
    geom_point(data = mds_df_subset, aes(x = X1, y = X3, col = user_id), size = 2, alpha = 0.7)+
    scale_x_continuous(breaks = seq(-1,1,by = 0.2))+
    scale_y_continuous(breaks = seq(-1,1,by = 0.2))+
    guides(col = FALSE)
  
  grid.arrange(g_12, g_13, nrow = 1)
  g_mds = arrangeGrob(g_12, g_13, nrow = 2)
  g_mds
  
  ggsave(filename = paste0(IO$panels, "mds_",bc,".pdf"), 
         plot = g_mds, 
         width = viz$full_width/4, 
         height = viz$full_width/2.2,
         scale = viz$scale)
  
    
  ggsave(filename = paste0(IO$panels, "mds_",bc,"_12.pdf"), 
         plot = g_12, 
         width = viz$full_width/4, 
         height = viz$full_width/4,
         scale = viz$scale)
  
    ggsave(filename = paste0(IO$panels, "mds_",bc,"_13.pdf"), 
         plot = g_13, 
         width = viz$full_width/4, 
         height = viz$full_width/4,
         scale = viz$scale)
  
  
  avg_profiles = this_avg_profile_per_user[which(this_avg_profile_per_user$user_id %in%  user_ids),]
  
  c_wide_subset = this_c_wide[which(this_c_wide$user_id %in%  user_ids),]
  avg_profiles = melt(c_wide_subset)
  avg_profiles$avg_tender_breast = avg_profiles$value
  avg_profiles$user_id = factor(as.character(avg_profiles$user_id), levels = user_ids)
  
  g_profiles = ggplot(avg_profiles, aes(x = cycleday_m_D, y = avg_tender_breast, fill = user_id, col = user_id))+
    #geom_point()+
    #geom_line()+
    geom_area(position = "identity", alpha = 0.75)+
    facet_grid(user_id ~ .)+
    guides(col = FALSE, fill = FALSE)+
    theme(strip.text = element_blank())+
    scale_y_continuous(breaks = c(0,0.5,1))+
    scale_x_continuous(breaks = par$x.axis)+
    xlab("cycleday from menstruation")+
    ylab("average breast tenderness")
  g_profiles
  
  ggsave(filename = paste0(IO$panels, "profiles_",bc,".pdf"), 
         plot = g_profiles, 
         width = viz$full_width/4, 
         height = 0.65*viz$full_width/4,
         scale = viz$scale*1.5)
  
}
## Loading objects:
##   mds_df
##   eig_df
##   rtsne_out_df
##   this_avg_profile_per_user
##   this_c_wide
## Loading objects:
##   mds_df
##   eig_df
##   rtsne_out_df
##   this_avg_profile_per_user
##   this_c_wide

Figure 3: PILL TRANSITION

load(file = paste0(IO$out_Rdata,"pill_transition_profiles.Rdata"), verbose = TRUE)
## Loading objects:
##   transition_profiles
transition_profiles$TB_change_cat_simple = factor(transition_profiles$TB_change_cat_simple  , levels = c("increase","no change","decrease"))

agg = unique(transition_profiles[,c("TB_change_cat_simple","transition","n_users")])
agg$cycle_nb_m_from_trans = -4
agg
##      TB_change_cat_simple transition n_users cycle_nb_m_from_trans
## 1                decrease   off pill     534                    -4
## 206              increase   off pill     989                    -4
## 414             no change   off pill     232                    -4
## 622              decrease    on pill    4544                    -4
## 830              increase    on pill    3165                    -4
## 1038            no change    on pill    2146                    -4
agg_sum = aggregate( n_users ~ transition, agg, FUN = sum )
colnames(agg_sum) = c("transition","n_users_tot")
agg = merge(agg, agg_sum, all = TRUE)
agg$perc_users = paste0(round(100*agg$n_users/agg$n_users_tot),"%")
agg$text = paste0(agg$n_users, "\n",agg$perc_users)

g = ggplot(transition_profiles, aes(x = cycleday_m_D, y = fraction_cycles_with_TB, col = BC)) + 
  geom_line()+ facet_grid(transition + TB_change_cat_simple ~ cycle_nb_m_from_trans) +
  scale_x_continuous(breaks = par$x.axis, limits = c(-par$D,par$Df))+ guides(col = FALSE)+
  geom_text(data = agg, aes(x = -par$D, y = 0.25*max(transition_profiles$fraction_cycles_with_TB), col = transition, label = text), 
            nudge_x = 7, nudge_y = 0, size = 3.5)+
  scale_color_manual(values = rep(cols$BC,2))+
  scale_y_continuous(labels = scales::percent)+
  xlab("cycleday from menstruation")+
  ylab("% of cycles with reported breast tenderness")
print(g)

ggsave(filename = paste0(IO$panels, "profiles_pill_transition.pdf"), 
       plot = g, 
       width = viz$full_width/2, 
       height = 0.85*viz$full_width/2,
       scale = viz$scale*1.1)
load(file = paste0(IO$out_Rdata,"table_TB_change_cat_x_BC_df.Rdata"), verbose = TRUE)
## Loading objects:
##   table_TB_change_cat_x_BC_df
load(file = paste0(IO$out_Rdata,"hist_log2_TB_ratio_x_BC.Rdata"), verbose = TRUE)
## Loading objects:
##   hist_log2_TB_ratio_x_BC
g = ggplot(table_TB_change_cat_x_BC_df, aes(x = TB_change_cat_wrap, y = n_users, fill = BC))+
  geom_bar(stat = "identity")+
  facet_grid(BC ~., scale = "free")+
  xlab("")+ylab("# of users")+
  scale_fill_manual(values = cols$BC)
g

g_hist = ggplot(hist_log2_TB_ratio_x_BC, aes(x = log2_TB_ratio, y = n_users, fill = transition))+
  geom_bar(stat = "identity", width = 0.5)+
  facet_grid(transition ~., scale = "free")+
  xlab("log2(TB ratio: after/before)")+ylab("# of users")+
  #geom_vline(xintercept = c(-value_inf+0.5,-1.5,-0.5,0.5,1.5,value_inf-0.5), col = "gray20", linetype = 2, size = 0.2)+
  geom_vline(xintercept = c(-0.5,0.5), col = "gray20", linetype = 2, size = 0.2)+
  geom_vline(xintercept = 0, col = "gray20", linetype = 1, size = 0.5)+
  scale_fill_manual(values = cols$BC)+
  scale_x_continuous(breaks = seq(-6,6,by = 1))
g_hist

ggsave(filename = paste0(IO$panels, "pill_transition_TB_change_cat_x_BC.pdf"), 
       plot = g, 
       width = viz$full_width/2, 
       height = 0.4*viz$full_width/2,
       scale = viz$scale)


ggsave(filename = paste0(IO$panels, "pill_transition_hist_log2_TB_ratio_x_BC.pdf"), 
       plot = g_hist, 
       width = viz$full_width/2, 
       height = 0.33*viz$full_width/2,
       scale = viz$scale)
load(file = paste0(IO$out_Rdata,"days_selected_users_for_pill_transition_examples.Rdata"), verbose = TRUE)
## Loading objects:
##   selected_users_days
selected_users_days$user_id =   selected_users_days$stretch_id

stretch_ids = unique(selected_users_days$stretch_id)
for(stretch_id in stretch_ids){
  cat("\t\t",stretch_id,"\n")
  this_stretch_days = selected_users_days[selected_users_days$stretch_id == stretch_id,]
  trans = unique(this_stretch_days$transition)
  g = ggplot_user_history(this_stretch_days, pill_transition = trans, print_x_lab = FALSE, print_title = FALSE, make_x_axis_symmetrical = TRUE)
  print(g)
  
  ggsave(filename = paste0(IO$panels, "profiles_pill_transition_example_",stretch_id,".pdf"), 
         plot = g, 
         width = viz$full_width/2, 
         height = 0.18*viz$full_width/2,
         scale = viz$scale*1.2)
  
  
}
##       user_1

##       user_2

##       user_3

##       user_4

##       user_5

##       user_6

##       user_7

##       user_8

##       user_9

##       user_10